import os.path
import time
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json

state_url = "http://xzqh.mca.gov.cn/defaultQuery?shengji=%B1%B1%BE%A9%CA%D0%A3%A8%BE%A9%A3%A9&diji=%B1%B1%BE%A9%CA%D0&xianji=%B6%AB%B3%C7%C7%F8"
stateHeaders = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
    "Accept-Encoding": "gzip, deflate",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Cache-Control": "no-cache",
    "Connection": "keep-alive",
    "Cookie": "JSESSIONID=92DA776C649BB46D6E47F5BFB136BE28; _gscbrs_266093035=1; _gscu_266093035=00718425uewfr384; Hm_lvt_37390caebda63ff110684bb975115c93=1700718428; Hm_lpvt_37390caebda63ff110684bb975115c93=1700718684",
    "Host": "xzqh.mca.gov.cn",
    "Pragma": "no-cache",
    "Referer": "http://xzqh.mca.gov.cn/defaultQuery?shengji=%B1%B1%BE%A9%CA%D0%A3%A8%BE%A9%A3%A9&diji=%B1%B1%BE%A9%CA%D0&xianji=%B4%F3%D0%CB%C7%F8",
    "Upgrade-Insecure-Requests": "1",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36"
}
response = requests.get(state_url, headers=stateHeaders)
response.encoding = response.apparent_encoding
soup = BeautifulSoup(response.text, 'html.parser')
results = soup.find_all('table', class_='select_table')[-1]
imgTag = json.loads(results.find('input').get('value'))  # 查找他的input标签的值
fileNAme = '民政部门行政区域以及代码数据.csv'
dirName = 'state'
for tag in imgTag:
    if not os.path.exists(dirName):
        os.mkdir(dirName)
    filePath = dirName + '/' + fileNAme
    pf = pd.DataFrame(imgTag)
    time.sleep(3)
    pf.to_csv(filePath, index=False, encoding='utf-8-sig')
